pacman::p_load(tidyverse, data.table)
rm(list=ls())
################################################################################ 

####################### Spatial units
df = fread("../../data/landuse/clean/geographicunits_brazil.csv.gz")
df_u_b = df[, list(county_id, amc_id, microregion_id, mesoregion_id, state_id, state, region)]
df = fread("../../data/landuse/clean/geographicunits_argentina.csv.gz")
df_u_a = df[, list(county_id, state_id, state, region)]


####################### Brazil beef
folder = "../../data/agribusiness/raw/brazil-beef-v2.2.0-2024-01-17"
if(!dir.exists(folder)) {unzip(paste0(folder,".zip"), exdir = folder)}
df_m = fread(paste0(folder,"/brazil-beef-v2.2.0-2024-01-17.csv"))
unlink(folder, recursive=TRUE)

#Rename columns
df = df_m
setnames(df, c('year','country_of_production','municipality_of_production_trase_id','municipality_of_production','state_of_production','volume','fob','logistics_hub','exporter','exporter_group','importer','importer_group','country_of_destination','product_type'),
         c('year','country','county_id','county','state','equivalent_tonnes','fob_usd','hub','exporter','exporter_group','importer','importer_group','destination','product_type'))
df[,port:='UNKNOWN']
df = df[, list(year,country,state,county,county_id,equivalent_tonnes,fob_usd,hub,port,exporter,exporter_group,importer,importer_group,destination,product_type) ]
df[, commodity:='beef']

#Format county ID
df[,id_type:='county_known']
df = df %>% mutate(county_id = str_replace(county_id, "BR-", "BR"))
df[county_id=="BRXXXXXXX", id_type:='county_unknown_state_unknown']
df[like(county_id,"AGGREGATED"), id_type:='county_aggregated_state_known']
share_county_known_fob = sum(df[id_type=='county_known',]$fob_usd)/sum(df$fob_usd)
share_county_known_vol = sum(df[id_type=='county_known',]$equivalent_tonnes)/sum(df$equivalent_tonnes)

#Format firm names
df[like(exporter_group,"JBS"), exporter_group:='JBS']
df[like(exporter_group,"MINERVA"), exporter_group:='MINERVA']
df[like(exporter_group,"MARFRIG"), exporter_group:='MARFRIG']
df[like(exporter_group,"MERCURIO"), exporter_group:='MERCURIO']
df[like(exporter_group,"MATABOI"), exporter_group:='MATABOI ALIMENTOS']
df[like(exporter_group,"FRIGOL"), exporter_group:='FRIGOL']

#Merge with other spatial units
dff = merge(df_u_b, df, by=c('state','county_id'), all.y=T)
share_non_merged = sum(dff[is.na(amc_id) & id_type=='county_known',]$equivalent_tonnes)/sum(dff$equivalent_tonnes)
dff[id_type %in% c('county_aggregated_state_known'), state_id:=county_id]
dff = dff %>% mutate(state_id = str_replace(state_id, "-AGGREGATED", ""))
dff = dff %>% mutate(state_id = str_replace(state_id, "XXXXX", ""))

df_u_bb = unique(df_u_b[, list(state, state_id, region)])
setnames(df_u_bb, old=c('state_id','region'), new=c('state_id_b','region_b'))
dff = merge(dff, df_u_bb, by=c('state'), all.x=T)
dff$state_id = dff$state_id_b
dff$region = dff$region_b
dff[, c('state_id_b','region_b') := c(NULL,NULL)]

#Firm names
names_x = data.frame(unique(dff$exporter))
names_xg = data.frame(unique(dff$exporter_group))
names_m = data.frame(unique(dff$importer))
names_mg = data.frame(unique(dff$importer_group))

#Other edits
dff[exporter=="",]$exporter='UNKNOWN'
dff[exporter_group=="",]$exporter_group='UNKNOWN'
dff[importer=="",]$importer='UNKNOWN'
dff[importer_group=="",]$importer_group='UNKNOWN'
dff[port=="",]$port='UNKNOWN'
write.csv(dff, gzfile(paste0("../../data/agribusiness/clean/beef_brazil.csv.gz")), row.names = FALSE)

#Export destination list
dfn = unique(dff, by = "destination")[, list(destination)]
write.csv(dfn, paste0("../../data/agribusiness/clean/beef_brazil_destinations.csv"), row.names = FALSE)


####################### Brazil soybean
folder = "../../data/agribusiness/raw/brazil-soy-v2.6.0-2024-01-17"
if(!dir.exists(folder)) {unzip(paste0(folder,".zip"), exdir = folder)}
df_m = fread(paste0(folder,"/brazil-soy-v2.6.0-2024-01-17.csv"))
unlink(folder, recursive=TRUE)

#Rename columns
df = df_m
setnames(df, c('year','country_of_production','municipality_of_production_trase_id','municipality_of_production','state','volume','fob','logistics_hub','port_of_export','exporter','exporter_group','importer','importer_group','country_of_first_import'),
         c('year','country','county_id','county','state','equivalent_tonnes','fob_usd','hub','port','exporter','exporter_group','importer','importer_group','destination'))
df$product_type = 'Soy bean equivalents'
df = df[, list(year,country,state,county,county_id,equivalent_tonnes,fob_usd,hub,port,exporter,exporter_group,importer,importer_group,destination,product_type) ]
df[, commodity:='soybean']

#Format county ID
df[,id_type:='county_known']
df = df %>% mutate(county_id = str_replace(county_id, "BR-", "BR"))
df[county_id=="BRXXXXXXX", id_type:='county_unknown_state_unknown']
share_county_known_fob = sum(df[id_type=='county_known',]$fob_usd)/sum(df$fob_usd)
share_county_known_vol = sum(df[id_type=='county_known',]$equivalent_tonnes)/sum(df$equivalent_tonnes)

#Format firm names
df[like(exporter_group,"BUNGE"), exporter_group:='BUNGE']
df[like(exporter_group,"CARGILL"), exporter_group:='CARGILL']
df[like(exporter_group,"AMAGGI"), exporter_group:='AMAGGI']
df[like(exporter_group,"LOUIS DREYFUS"), exporter_group:='LOUIS DREYFUS']
df[like(exporter_group,"COFCO"), exporter_group:='COFCO']
df[like(exporter_group,"BIANCHINI"), exporter_group:='BIANCHINI']

#Merge with other spatial units
dff = merge(df_u_b, df, by=c('state','county_id'), all.y=T)
share_non_merged = sum(dff[is.na(amc_id) & id_type=='county_known',]$equivalent_tonnes)/sum(dff$equivalent_tonnes)
df_u_bb = unique(df_u_b[, list(state, state_id, region)])
setnames(df_u_bb, old=c('state_id','region'), new=c('state_id_b','region_b'))
dff = merge(dff, df_u_bb, by=c('state'), all.x=T)
dff$state_id = dff$state_id_b
dff$region = dff$region_b
dff[, c('state_id_b','region_b') := c(NULL,NULL)]

#Firm names
names_x = data.frame(unique(dff$exporter))
names_xg = data.frame(unique(dff$exporter_group))
names_m = data.frame(unique(dff$importer))
names_mg = data.frame(unique(dff$importer_group))

#Other edits
dff[exporter=="",]$exporter='UNKNOWN'
dff[exporter_group=="",]$exporter_group='UNKNOWN'
dff[importer=="",]$importer='UNKNOWN'
dff[importer_group=="",]$importer_group='UNKNOWN'
dff[port=="",]$port='UNKNOWN'
write.csv(dff, gzfile(paste0("../../data/agribusiness/clean/soybean_brazil.csv.gz")), row.names = FALSE)

#Export destination list
dfn = unique(dff, by = "destination")[, list(destination)]
write.csv(dfn, paste0("../../data/agribusiness/clean/soybean_brazil_destinations.csv"), row.names = FALSE)


####################### Brazil maize
folder = "../../data/agribusiness/raw/brazil-corn-v2.5.1-2024-01-17"
if(!dir.exists(folder)) {unzip(paste0(folder,".zip"), exdir = folder)}
df_m = fread(paste0(folder,"/brazil-corn-v2.5.1-2024-01-17.csv"))
unlink(folder, recursive=TRUE)

#Rename columns
df = df_m
setnames(df, c('year','country_of_production','municipality_trase_id','municipality','state','volume','fob','logistics_hub','port','exporter','exporter_group','importer','importer_group','country_of_destination'),
         c('year','country','county_id','county','state','equivalent_tonnes','fob_usd','hub','port','exporter','exporter_group','importer','importer_group','destination'))
df$product_type = 'Corn equivalents'
df = df[, list(year,country,state,county,county_id,equivalent_tonnes,fob_usd,hub,port,exporter,exporter_group,importer,importer_group,destination,product_type) ]
df[, commodity:='maize']

#Format county ID
df[,id_type:='county_known']
df = df %>% mutate(county_id = str_replace(county_id, "BR-", "BR"))
df[county_id=="BRXXXXXXX", id_type:='county_unknown_state_unknown']
share_county_known_fob = sum(df[id_type=='county_known',]$fob_usd)/sum(df$fob_usd)
share_county_known_vol = sum(df[id_type=='county_known',]$equivalent_tonnes)/sum(df$equivalent_tonnes)

#Format firm names
df[like(exporter_group,"BUNGE"), exporter_group:='BUNGE']
df[like(exporter_group,"CARGILL"), exporter_group:='CARGILL']
df[like(exporter_group,"AMAGGI"), exporter_group:='AMAGGI']
df[like(exporter_group,"LOUIS DREYFUS"), exporter_group:='LOUIS DREYFUS']
df[like(exporter_group,"COFCO"), exporter_group:='COFCO']

#Merge with other spatial units
dff = merge(df_u_b, df, by=c('state','county_id'), all.y=T)
share_non_merged = sum(dff[is.na(amc_id) & id_type=='county_known',]$equivalent_tonnes)/sum(dff$equivalent_tonnes)
df_u_bb = unique(df_u_b[, list(state, state_id, region)])
setnames(df_u_bb, old=c('state_id','region'), new=c('state_id_b','region_b'))
dff = merge(dff, df_u_bb, by=c('state'), all.x=T)
dff$state_id = dff$state_id_b
dff$region = dff$region_b
dff[, c('state_id_b','region_b') := c(NULL,NULL)]

#Firm names
names_x = data.frame(unique(dff$exporter))
names_xg = data.frame(unique(dff$exporter_group))
names_m = data.frame(unique(dff$importer))
names_mg = data.frame(unique(dff$importer_group))

#Other edits
dff[exporter=="",]$exporter='UNKNOWN'
dff[exporter_group=="",]$exporter_group='UNKNOWN'
dff[importer=="",]$importer='UNKNOWN'
dff[importer_group=="",]$importer_group='UNKNOWN'
dff[port=="",]$port='UNKNOWN'
write.csv(dff, gzfile(paste0("../../data/agribusiness/clean/maize_brazil.csv.gz")), row.names = FALSE)

#Export destination list
dfn = unique(dff, by = "destination")[, list(destination)]
write.csv(dfn, paste0("../../data/agribusiness/clean/maize_brazil_destinations.csv"), row.names = FALSE)


####################### Argentina soybean
folder = "../../data/agribusiness/raw/argentina-soy-v1.1.1-2024-01-17"
if(!dir.exists(folder)) {unzip(paste0(folder,".zip"), exdir = folder)}
df_m = fread(paste0(folder,"/argentina-soy-v1.1.1-2024-01-17.csv"))
unlink(folder, recursive=TRUE)

#Rename columns
df = df_m
setnames(df, c('year','country_of_production','department_of_production_trase_id','department_of_production','province_of_production','volume','fob','port_of_export','exporter','exporter_group','country_of_destination','product_type'),
         c('year','country','county_id','county','state','equivalent_tonnes','fob_usd','port','exporter','exporter_group','destination','product_type'))
df[,hub:='NA']
df = df[, list(year,country,state,county_id,county,equivalent_tonnes,fob_usd,hub,port,exporter,exporter_group,destination,product_type) ]
df[, commodity:='soybean']

#Format county ID
df[,id_type:='county_known']
df = df %>% mutate(county_id = str_replace(county_id, "AR-", "AR0"))
df[county_id=="AR0XXXXX" & state!='UNKNOWN'  & state!= 'IMPORTS + STOCK', id_type:='county_unknown_state_known']
df[county_id=="AR0XXXXX" & state=='UNKNOWN', id_type:='county_unknown_state_unknown']
df[county_id=="AR0XXXXX" & state=='IMPORTS + STOCK', id_type:='county_unknown_state_unknown']
df[county_id=="AR0XXXXX-STOCK", id_type:='county_unknown_state_unknown']
share_county_known_fob = sum(df[id_type=='county_known',]$fob_usd)/sum(df[state!= 'IMPORTS + STOCK',]$fob_usd)
share_county_known_vol = sum(df[id_type=='county_known',]$equivalent_tonnes)/sum(df[state!= 'IMPORTS + STOCK',]$equivalent_tonnes)

#Format firm names
df[like(exporter_group,"BUNGE"), exporter_group:='BUNGE']
df[like(exporter_group,"CARGILL"), exporter_group:='CARGILL']
df[like(exporter_group,"AMAGGI"), exporter_group:='AMAGGI']
df[like(exporter_group,"LOUIS DREYFUS"), exporter_group:='LOUIS DREYFUS']
df[like(exporter_group,"COFCO"), exporter_group:='COFCO']

#Merge with other spatial units
dff = merge(df_u_a, df, by=c('state','county_id'), all.y=T)
share_non_merged = sum(dff[is.na(state_id) & id_type=='county_known',]$equivalent_tonnes)/sum(dff[id_type=='county_known',]$equivalent_tonnes)
df_u_aa = unique(df_u_a[, list(state, state_id, region)])
setnames(df_u_aa, old=c('state_id','region'), new=c('state_id_a','region_a'))
dff = merge(dff, df_u_aa, by=c('state'), all.x=T)
dff$state_id = dff$state_id_a
dff$region = dff$region_a
dff[, c('state_id_a','region_a') := c(NULL,NULL)]

#Firm names
names_x = data.frame(unique(dff$exporter))
names_xg = data.frame(unique(dff$exporter_group))

#Other edits
dff[exporter=="",]$exporter='UNKNOWN'
dff[exporter_group=="",]$exporter_group='UNKNOWN'
dff[port=="",]$port='UNKNOWN'
write.csv(dff, gzfile(paste0("../../data/agribusiness/clean/soybean_argentina.csv.gz")), row.names = FALSE)

#Export destination list
dfn = unique(dff, by = "destination")[, list(destination)]
write.csv(dfn, paste0("../../data/agribusiness/clean/soybean_argentina_destinations.csv"), row.names = FALSE)


############################################### Define destination blocs

dfn = fread("../../data/agribusiness/raw/destinations_crosswalk.csv")[, list(destination,destination_bloc)]

df = fread("../../data/agribusiness/clean/beef_brazil.csv.gz")
df = merge(df, dfn, by='destination', all.x=T)
write.csv(df, gzfile(paste0("../../data/agribusiness/clean/beef_brazil.csv.gz")), row.names = FALSE)

df = fread("../../data/agribusiness/clean/soybean_brazil.csv.gz")
df = merge(df, dfn, by='destination', all.x=T)
write.csv(df, gzfile(paste0("../../data/agribusiness/clean/soybean_brazil.csv.gz")), row.names = FALSE)

df = fread("../../data/agribusiness/clean/maize_brazil.csv.gz")
df = merge(df, dfn, by='destination', all.x=T)
write.csv(df, gzfile(paste0("../../data/agribusiness/clean/maize_brazil.csv.gz")), row.names = FALSE)

df = fread("../../data/agribusiness/clean/soybean_argentina.csv.gz")
df = merge(df, dfn, by='destination', all.x=T)
write.csv(df, gzfile(paste0("../../data/agribusiness/clean/soybean_argentina.csv.gz")), row.names = FALSE)



############################################### Export hubs and ports

########### Brazil
df = fread("../../data/landuse/clean/geographicunits_brazil.csv.gz")
df_u = df[, list(county, county_id, amc_id, microregion_id, mesoregion_id, state_id, state, region)]

#Beef
df = fread("../../data/agribusiness/clean/beef_brazil.csv.gz")[,list(hub, equivalent_tonnes)]
df = df[, lapply(.SD, sum), by=.(hub), .SDcols=c('equivalent_tonnes')]
df$share = round(df$equivalent_tonnes/sum(df$equivalent_tonnes),4)
df_x = df
df = merge(df_x, df_u, by.x='hub', by.y='county', all.x=T)
write.csv(df, paste0("../../data/agribusiness/clean/hubs_unmatched_beef_brazil.csv"), row.names = FALSE)

#Soybean
df = fread("../../data/agribusiness/clean/soybean_brazil.csv.gz")[,list(hub, equivalent_tonnes)]
df = df[, lapply(.SD, sum), by=.(hub), .SDcols=c('equivalent_tonnes')]
df$share = round(df$equivalent_tonnes/sum(df$equivalent_tonnes),4)
df_x = df
df = merge(df_x, df_u, by.x='hub', by.y='county', all.x=T)
write.csv(df, paste0("../../data/agribusiness/clean/hubs_unmatched_soybean_brazil.csv"), row.names = FALSE)

#Maize
df = fread("../../data/agribusiness/clean/maize_brazil.csv.gz")[,list(hub, equivalent_tonnes)]
df = df[, lapply(.SD, sum), by=.(hub), .SDcols=c('equivalent_tonnes')]
df$share = round(df$equivalent_tonnes/sum(df$equivalent_tonnes),4)
df_x = df
df = merge(df_x, df_u, by.x='hub', by.y='county', all.x=T)
write.csv(df, paste0("../../data/agribusiness/clean/hubs_unmatched_maize_brazil.csv"), row.names = FALSE)

########### Argentina
df = fread("../../data/landuse/clean/geographicunits_argentina.csv.gz")
df_u = df[, list(county, county_id, state_id, state, region)]
#Soybean
df = fread("../../data/agribusiness/clean/soybean_argentina.csv.gz")[,list(port, equivalent_tonnes)]
df = df[, lapply(.SD, sum), by=.(port), .SDcols=c('equivalent_tonnes')]
df$share = round(df$equivalent_tonnes/sum(df$equivalent_tonnes),4)
df_x = df
df = merge(df_x, df_u, by.x='port', by.y='county', all.x=T)
write.csv(df, paste0("../../data/agribusiness/clean/ports_unmatched_soybean_argentina.csv"), row.names = FALSE)

### At this stage, the matching of remaining un-matched hubs has to be done manually, producing:
#data/agribusiness/raw/hubs_matched_beef_brazil.csv
#data/agribusiness/raw/hubs_matched_soybean_brazil.csv
#data/agribusiness/raw/hubs_matched_maize_brazil.csv
#data/agribusiness/raw/ports_matched_soybean_argentina.csv

df = fread("../../data/agribusiness/raw/hubs_matched_beef_brazil.csv")
write.csv(df, paste0("../../data/agribusiness/clean/hubs_matched_beef_brazil.csv"), row.names = FALSE)

df = fread("../../data/agribusiness/raw/hubs_matched_soybean_brazil.csv")
write.csv(df, paste0("../../data/agribusiness/clean/hubs_matched_soybean_brazil.csv"), row.names = FALSE)

df = fread("../../data/agribusiness/raw/hubs_matched_maize_brazil.csv")
write.csv(df, paste0("../../data/agribusiness/clean/hubs_matched_maize_brazil.csv"), row.names = FALSE)

df = fread("../../data/agribusiness/raw/ports_matched_soybean_argentina.csv")
write.csv(df, paste0("../../data/agribusiness/clean/ports_matched_soybean_argentina.csv"), row.names = FALSE)







